?aus_production
# Select series of interest from datasets
aus_production <- aus_production %>% select(Quarter, Bricks)
pelt <- pelt %>% select(Year, Lynx)
gafa_stock <- gafa_stock %>% select(Date, Close)
vic_elec <- vic_elec %>% select(Time, Demand)
Looking at each of our datasets, the timescale of each is listed below:
aus_production - Quarterlypelt - Yearlygafa_stock - Daily (trading days)vic_elec - Half-Hourly# Plotting each series using `autoplot`
autoplot(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values (`geom_line()`).
autoplot(pelt, Lynx)
autoplot(gafa_stock, Close)
autoplot(vic_elec, Demand)
# Modify axes labels for Victoria, Aus
autoplot(vic_elec, Demand) + labs(x="Half-Hours", y="Electricity Demand")
To find the peak (max) closing price for each stock, we’ll need to
first group our data then filter by our value (in this case
Close):
# First group by the stock, then find the max closing price for each symbol
gafa_stock %>% group_by(Symbol) %>% filter(Close == max(Close))
## # A tsibble: 4 x 3 [!]
## # Key: Symbol [4]
## # Groups: Symbol [4]
## Date Close Symbol
## <date> <dbl> <chr>
## 1 2018-10-03 232. AAPL
## 2 2018-09-04 2040. AMZN
## 3 2018-07-25 218. FB
## 4 2018-07-26 1268. GOOG
tute1 <- readr::read_csv("../data/tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): Sales, AdBudget, GDP
## date (1): Quarter
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(tute1)
mytimeseries <- tute1 |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter)
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y")
Now let’s remove
facet_grid
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() #+
# facet_grid(name ~ ., scales = "free_y")
The plots are included in the same panel and share the y-axis! This can smooth out the graphs unnecessarily.
library(USgas)
us_total <- us_total |> as_tsibble(index=year, key=state)
# Plot consumption for New England
new_england <- us_total %>%
filter(state == "Massachusetts" |
state=="Vermont" |
state==" New Hampshire" |
state=="Maine" |
state=="Connecticut" |
state=="Rhode Island")
autoplot(new_england, y) + labs(x="Year", y="Annual Gas Consumption (millions of cubic feet)")
# Formatting quarter: 1998-01-01 => 1998 Q1
tourism_xl <- readxl::read_excel("../data/tourism.xlsx") |>
mutate(Quarter= yearquarter(Quarter)) |>
as_tsibble(index=Quarter, key=c(Region, State, Purpose))
Finding whaich combination of Region and
Purpose had the maximum number of overnight trips on
average.
mean_trips <- tourism_xl %>%
group_by(Region, Purpose) %>%
mutate(trips=mean(Trips)) %>%
filter(trips == max(trips))
mean_trips
## # A tsibble: 24,320 x 6 [1Q]
## # Key: Region, State, Purpose [304]
## # Groups: Region, Purpose [304]
## Quarter Region State Purpose Trips trips
## <qtr> <chr> <chr> <chr> <dbl> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135. 156.
## 2 1998 Q2 Adelaide South Australia Business 110. 156.
## 3 1998 Q3 Adelaide South Australia Business 166. 156.
## 4 1998 Q4 Adelaide South Australia Business 127. 156.
## 5 1999 Q1 Adelaide South Australia Business 137. 156.
## 6 1999 Q2 Adelaide South Australia Business 200. 156.
## 7 1999 Q3 Adelaide South Australia Business 169. 156.
## 8 1999 Q4 Adelaide South Australia Business 134. 156.
## 9 2000 Q1 Adelaide South Australia Business 154. 156.
## 10 2000 Q2 Adelaide South Australia Business 169. 156.
## # ℹ 24,310 more rows
From our aggregated dataframe it appears Business in Adelaide produces the highest number of trips on average.
Now we can get the total trips by state using similar
group_by functionality:
# Getting total trips by state using the groupo_by function
total_trips <- tourism %>%
group_by(State) %>%
summarise(sum(Trips))
total_trips
## # A tsibble: 640 x 3 [1Q]
## # Key: State [8]
## State Quarter `sum(Trips)`
## <chr> <qtr> <dbl>
## 1 ACT 1998 Q1 551.
## 2 ACT 1998 Q2 416.
## 3 ACT 1998 Q3 436.
## 4 ACT 1998 Q4 450.
## 5 ACT 1999 Q1 379.
## 6 ACT 1999 Q2 558.
## 7 ACT 1999 Q3 449.
## 8 ACT 1999 Q4 595.
## 9 ACT 2000 Q1 600.
## 10 ACT 2000 Q2 557.
## # ℹ 630 more rows
us_emp <- fpp3::us_employment %>% filter(Title == "Total Private") %>% select(Month, Employed)
# Plotting US employment for each plot type
autoplot(us_emp, Employed)
gg_season(us_emp, Employed)
gg_lag(us_emp, Employed)
gg_subseries(us_emp, Employed)
# Selecting needed features
pelt <- tsibbledata::pelt %>% select(Year, Hare)
us_gas <- us_gasoline %>% select(Week, Barrels)
pbs <- PBS %>% filter(ATC2 == 'H02') %>% select(Month, Cost)
Plotting Pelt data first
autoplot(pelt, Hare)
# gg_season(pelt, Hare) # This is yearly data, so no seasonality
gg_lag(pelt, Hare)
gg_subseries(pelt, Hare)
Now we can plot from our aus_production dataset
autoplot(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values (`geom_line()`).
gg_season(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values (`geom_line()`).
gg_lag(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values (gg_lag).
gg_subseries(aus_production, Bricks)
## Warning: Removed 5 rows containing missing values (`geom_line()`).
This data is a bit less granular (monthly, instead of quarterly).
However, seasonal cycles can still be observed within a given year.
There was an outlier year in the early 80s likely due to a larger
economic issue.
Now we can plot our PBS Cost data. We definitely see seasonality within these time series, as well as a general increase over a longer time scale. Feb - May seems to be a down period for safety net payments as well.
autoplot(pbs, Cost)
gg_season(pbs, Cost)
# gg_lag(pbs, Cost) # More than one series present
gg_subseries(pbs, Cost)
Finally, we can plot out the data on US gasoline supplied. Again, we see seasonal effects present in this data. One thing about this time series is that the variance of the seasonal shifts is pretty small. In other words, the amount by which production swings due to seasonality is pretty consistent over time. Also, no larger outlier years jump out at us visually.
autoplot(us_gas, Barrels)
gg_season(us_gas, Barrels)
gg_lag(us_gas, Barrels)
gg_subseries(us_gas, Barrels)